Libraries
# if (!require("renv")) install.packages("renv")
# library(renv)
# renv::restore()
library(here)
library(dplyr)
library(readr)
library(arrow)
Read data
zip_file <- here("data", "raw", "iter_00_cpv2020_csv.zip")
temp_dir <- here("temp")
dir.create(temp_dir, showWarnings = FALSE)
unzip(zip_file, files = c("iter_00_cpv2020/conjunto_de_datos/conjunto_de_datos_iter_00CSV20.csv", "iter_00_cpv2020/diccionario_datos/diccionario_datos_iter_00CSV20.csv"), exdir = temp_dir)
data_path <- here(temp_dir,
"iter_00_cpv2020",
"conjunto_de_datos",
"conjunto_de_datos_iter_00CSV20.csv")
dict_path <- here(temp_dir,
"iter_00_cpv2020",
"diccionario_datos",
"diccionario_datos_iter_00CSV20.csv")
info_dict <- read_csv(dict_path)
New names:Rows: 290 Columns: 10── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (6): ...1, ...2, ...3, ...4, ...5, ...6
lgl (4): ...7, ...8, ...9, ...10
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df <- read_csv(data_path)
Rows: 195662 Columns: 286── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (283): ENTIDAD, NOM_ENT, MUN, NOM_MUN, LOC, NOM_LOC, LONGITUD, LATITUD, ALTITUD, POBFEM, POBMAS, P_0A2, P_0A2_F, P_0A2_M, P_3YMAS, P_3YMAS_F, P_3Y...
dbl (3): POBTOT, VIVTOT, TVIVHAB
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
unlink(temp_dir, recursive = TRUE)
# Exporting dictionary file
write_csv(info_dict,
here("data", "raw", "diccionario_datos_iter_00CSV20.csv"))
Exploration
head(df)
head(info_dict)
str(df)
spc_tbl_ [195,662 × 286] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
$ ENTIDAD : chr [1:195662] "00" "00" "00" "01" ...
$ NOM_ENT : chr [1:195662] "Total nacional" "Total nacional" "Total nacional" "Aguascalientes" ...
$ MUN : chr [1:195662] "000" "000" "000" "000" ...
$ NOM_MUN : chr [1:195662] "Total nacional" "Total nacional" "Total nacional" "Total de la entidad Aguascalientes" ...
$ LOC : chr [1:195662] "0000" "9998" "9999" "0000" ...
$ NOM_LOC : chr [1:195662] "Total nacional" "Localidades de una vivienda" "Localidades de dos viviendas" "Total de la Entidad" ...
$ LONGITUD : chr [1:195662] NA NA NA NA ...
$ LATITUD : chr [1:195662] NA NA NA NA ...
$ ALTITUD : chr [1:195662] NA NA NA NA ...
$ POBTOT : num [1:195662] 1.26e+08 2.50e+05 1.47e+05 1.43e+06 3.70e+03 ...
$ POBFEM : chr [1:195662] "64540634" "96869" "61324" "728924" ...
$ POBMAS : chr [1:195662] "61473390" "153485" "85801" "696683" ...
$ P_0A2 : chr [1:195662] "5764054" "10493" "6798" "71864" ...
$ P_0A2_F : chr [1:195662] "2848875" "5193" "3407" "35604" ...
$ P_0A2_M : chr [1:195662] "2915179" "5300" "3391" "36260" ...
$ P_3YMAS : chr [1:195662] "119976584" "239441" "139757" "1352235" ...
$ P_3YMAS_F : chr [1:195662] "61554567" "91463" "57628" "692561" ...
$ P_3YMAS_M : chr [1:195662] "58422017" "147978" "82129" "659674" ...
$ P_5YMAS : chr [1:195662] "115693273" "232086" "135028" "1299669" ...
$ P_5YMAS_F : chr [1:195662] "59433559" "87931" "55256" "666713" ...
$ P_5YMAS_M : chr [1:195662] "56259714" "144155" "79772" "632956" ...
$ P_12YMAS : chr [1:195662] "100528155" "207748" "119223" "1116719" ...
$ P_12YMAS_F : chr [1:195662] "51962264" "76111" "47543" "576593" ...
$ P_12YMAS_M : chr [1:195662] "48565891" "131637" "71680" "540126" ...
$ P_15YMAS : chr [1:195662] "93985354" "197411" "111530" "1038904" ...
$ P_15YMAS_F : chr [1:195662] "48732991" "71344" "44275" "538387" ...
$ P_15YMAS_M : chr [1:195662] "45252363" "126067" "67255" "500517" ...
$ P_18YMAS : chr [1:195662] "87492680" "186968" "104612" "960764" ...
$ P_18YMAS_F : chr [1:195662] "45530857" "66514" "41184" "500089" ...
$ P_18YMAS_M : chr [1:195662] "41961823" "120454" "63428" "460675" ...
$ P_3A5 : chr [1:195662] "6462212" "10900" "7028" "78833" ...
$ P_3A5_F : chr [1:195662] "3193548" "5270" "3511" "38679" ...
$ P_3A5_M : chr [1:195662] "3268664" "5630" "3517" "40154" ...
$ P_6A11 : chr [1:195662] "12986217" "20793" "13506" "156683" ...
$ P_6A11_F : chr [1:195662] "6398755" "10082" "6574" "77289" ...
$ P_6A11_M : chr [1:195662] "6587462" "10711" "6932" "79394" ...
$ P_8A14 : chr [1:195662] "15287375" "24342" "16724" "181905" ...
$ P_8A14_F : chr [1:195662] "7531118" "11538" "7679" "89383" ...
$ P_8A14_M : chr [1:195662] "7756257" "12804" "9045" "92522" ...
$ P_12A14 : chr [1:195662] "6542801" "10337" "7693" "77815" ...
$ P_12A14_F : chr [1:195662] "3229273" "4767" "3268" "38206" ...
$ P_12A14_M : chr [1:195662] "3313528" "5570" "4425" "39609" ...
$ P_15A17 : chr [1:195662] "6492674" "10443" "6918" "78140" ...
$ P_15A17_F : chr [1:195662] "3202134" "4830" "3091" "38298" ...
$ P_15A17_M : chr [1:195662] "3290540" "5613" "3827" "39842" ...
$ P_18A24 : chr [1:195662] "14736111" "27841" "16336" "180847" ...
$ P_18A24_F : chr [1:195662] "7398617" "11140" "6760" "90632" ...
$ P_18A24_M : chr [1:195662] "7337494" "16701" "9576" "90215" ...
$ P_15A49_F : chr [1:195662] "33885546" "47693" "29297" "388917" ...
$ P_60YMAS : chr [1:195662] "15142976" "37383" "21277" "145376" ...
$ P_60YMAS_F : chr [1:195662] "8139094" "13442" "8916" "78703" ...
$ P_60YMAS_M : chr [1:195662] "7003882" "23941" "12361" "66673" ...
$ REL_H_M : chr [1:195662] "95.25" "158.45" "139.91" "95.58" ...
$ POB0_14 : chr [1:195662] "31755284" "52523" "35025" "385195" ...
$ POB15_64 : chr [1:195662] "83663440" "171209" "96250" "941834" ...
$ POB65_MAS : chr [1:195662] "10321914" "26202" "15280" "97070" ...
$ P_0A4 : chr [1:195662] "10047365" "17848" "11527" "124430" ...
$ P_0A4_F : chr [1:195662] "4969883" "8725" "5779" "61452" ...
$ P_0A4_M : chr [1:195662] "5077482" "9123" "5748" "62978" ...
$ P_5A9 : chr [1:195662] "10764379" "17380" "11274" "131048" ...
$ P_5A9_F : chr [1:195662] "5311288" "8526" "5558" "64689" ...
$ P_5A9_M : chr [1:195662] "5453091" "8854" "5716" "66359" ...
$ P_10A14 : chr [1:195662] "10943540" "17295" "12224" "129717" ...
$ P_10A14_F : chr [1:195662] "5389280" "8061" "5423" "63637" ...
$ P_10A14_M : chr [1:195662] "5554260" "9234" "6801" "66080" ...
$ P_15A19 : chr [1:195662] "10806690" "18303" "11484" "131967" ...
$ P_15A19_F : chr [1:195662] "5344540" "8138" "5140" "65064" ...
$ P_15A19_M : chr [1:195662] "5462150" "10165" "6344" "66903" ...
$ P_20A24 : chr [1:195662] "10422095" "19981" "11770" "127020" ...
$ P_20A24_F : chr [1:195662] "5256211" "7832" "4711" "63866" ...
$ P_20A24_M : chr [1:195662] "5165884" "12149" "7059" "63154" ...
$ P_25A29 : chr [1:195662] "9993001" "20584" "12238" "118426" ...
$ P_25A29_F : chr [1:195662] "5131597" "7125" "4427" "60285" ...
$ P_25A29_M : chr [1:195662] "4861404" "13459" "7811" "58141" ...
$ P_30A34 : chr [1:195662] "9420827" "19601" "11315" "106825" ...
$ P_30A34_F : chr [1:195662] "4893101" "6309" "4074" "55174" ...
$ P_30A34_M : chr [1:195662] "4527726" "13292" "7241" "51651" ...
$ P_35A39 : chr [1:195662] "9020276" "18645" "10357" "99257" ...
$ P_35A39_F : chr [1:195662] "4688746" "6289" "3825" "51483" ...
$ P_35A39_M : chr [1:195662] "4331530" "12356" "6532" "47774" ...
$ P_40A44 : chr [1:195662] "8503586" "17934" "9705" "92378" ...
$ P_40A44_F : chr [1:195662] "4441282" "6060" "3743" "48539" ...
$ P_40A44_M : chr [1:195662] "4062304" "11874" "5962" "43839" ...
$ P_45A49 : chr [1:195662] "7942413" "16840" "8668" "84669" ...
$ P_45A49_F : chr [1:195662] "4130069" "5940" "3377" "44506" ...
$ P_45A49_M : chr [1:195662] "3812344" "10900" "5291" "40163" ...
$ P_50A54 : chr [1:195662] "7037532" "15070" "7878" "74121" ...
$ P_50A54_F : chr [1:195662] "3705369" "5481" "3239" "39510" ...
$ P_50A54_M : chr [1:195662] "3332163" "9589" "4639" "34611" ...
$ P_55A59 : chr [1:195662] "5695958" "13070" "6838" "58865" ...
$ P_55A59_F : chr [1:195662] "3002982" "4728" "2823" "31257" ...
$ P_55A59_M : chr [1:195662] "2692976" "8342" "4015" "27608" ...
$ P_60A64 : chr [1:195662] "4821062" "11181" "5997" "48306" ...
$ P_60A64_F : chr [1:195662] "2563200" "4050" "2511" "25871" ...
$ P_60A64_M : chr [1:195662] "2257862" "7131" "3486" "22435" ...
$ P_65A69 : chr [1:195662] "3645077" "9160" "5052" "35823" ...
$ P_65A69_F : chr [1:195662] "1938227" "3343" "2130" "19125" ...
$ P_65A69_M : chr [1:195662] "1706850" "5817" "2922" "16698" ...
$ P_70A74 : chr [1:195662] "2647340" "6903" "3852" "25586" ...
[list output truncated]
- attr(*, "spec")=
.. cols(
.. ENTIDAD = col_character(),
.. NOM_ENT = col_character(),
.. MUN = col_character(),
.. NOM_MUN = col_character(),
.. LOC = col_character(),
.. NOM_LOC = col_character(),
.. LONGITUD = col_character(),
.. LATITUD = col_character(),
.. ALTITUD = col_character(),
.. POBTOT = col_double(),
.. POBFEM = col_character(),
.. POBMAS = col_character(),
.. P_0A2 = col_character(),
.. P_0A2_F = col_character(),
.. P_0A2_M = col_character(),
.. P_3YMAS = col_character(),
.. P_3YMAS_F = col_character(),
.. P_3YMAS_M = col_character(),
.. P_5YMAS = col_character(),
.. P_5YMAS_F = col_character(),
.. P_5YMAS_M = col_character(),
.. P_12YMAS = col_character(),
.. P_12YMAS_F = col_character(),
.. P_12YMAS_M = col_character(),
.. P_15YMAS = col_character(),
.. P_15YMAS_F = col_character(),
.. P_15YMAS_M = col_character(),
.. P_18YMAS = col_character(),
.. P_18YMAS_F = col_character(),
.. P_18YMAS_M = col_character(),
.. P_3A5 = col_character(),
.. P_3A5_F = col_character(),
.. P_3A5_M = col_character(),
.. P_6A11 = col_character(),
.. P_6A11_F = col_character(),
.. P_6A11_M = col_character(),
.. P_8A14 = col_character(),
.. P_8A14_F = col_character(),
.. P_8A14_M = col_character(),
.. P_12A14 = col_character(),
.. P_12A14_F = col_character(),
.. P_12A14_M = col_character(),
.. P_15A17 = col_character(),
.. P_15A17_F = col_character(),
.. P_15A17_M = col_character(),
.. P_18A24 = col_character(),
.. P_18A24_F = col_character(),
.. P_18A24_M = col_character(),
.. P_15A49_F = col_character(),
.. P_60YMAS = col_character(),
.. P_60YMAS_F = col_character(),
.. P_60YMAS_M = col_character(),
.. REL_H_M = col_character(),
.. POB0_14 = col_character(),
.. POB15_64 = col_character(),
.. POB65_MAS = col_character(),
.. P_0A4 = col_character(),
.. P_0A4_F = col_character(),
.. P_0A4_M = col_character(),
.. P_5A9 = col_character(),
.. P_5A9_F = col_character(),
.. P_5A9_M = col_character(),
.. P_10A14 = col_character(),
.. P_10A14_F = col_character(),
.. P_10A14_M = col_character(),
.. P_15A19 = col_character(),
.. P_15A19_F = col_character(),
.. P_15A19_M = col_character(),
.. P_20A24 = col_character(),
.. P_20A24_F = col_character(),
.. P_20A24_M = col_character(),
.. P_25A29 = col_character(),
.. P_25A29_F = col_character(),
.. P_25A29_M = col_character(),
.. P_30A34 = col_character(),
.. P_30A34_F = col_character(),
.. P_30A34_M = col_character(),
.. P_35A39 = col_character(),
.. P_35A39_F = col_character(),
.. P_35A39_M = col_character(),
.. P_40A44 = col_character(),
.. P_40A44_F = col_character(),
.. P_40A44_M = col_character(),
.. P_45A49 = col_character(),
.. P_45A49_F = col_character(),
.. P_45A49_M = col_character(),
.. P_50A54 = col_character(),
.. P_50A54_F = col_character(),
.. P_50A54_M = col_character(),
.. P_55A59 = col_character(),
.. P_55A59_F = col_character(),
.. P_55A59_M = col_character(),
.. P_60A64 = col_character(),
.. P_60A64_F = col_character(),
.. P_60A64_M = col_character(),
.. P_65A69 = col_character(),
.. P_65A69_F = col_character(),
.. P_65A69_M = col_character(),
.. P_70A74 = col_character(),
.. P_70A74_F = col_character(),
.. P_70A74_M = col_character(),
.. P_75A79 = col_character(),
.. P_75A79_F = col_character(),
.. P_75A79_M = col_character(),
.. P_80A84 = col_character(),
.. P_80A84_F = col_character(),
.. P_80A84_M = col_character(),
.. P_85YMAS = col_character(),
.. P_85YMAS_F = col_character(),
.. P_85YMAS_M = col_character(),
.. PROM_HNV = col_character(),
.. PNACENT = col_character(),
.. PNACENT_F = col_character(),
.. PNACENT_M = col_character(),
.. PNACOE = col_character(),
.. PNACOE_F = col_character(),
.. PNACOE_M = col_character(),
.. PRES2015 = col_character(),
.. PRES2015_F = col_character(),
.. PRES2015_M = col_character(),
.. PRESOE15 = col_character(),
.. PRESOE15_F = col_character(),
.. PRESOE15_M = col_character(),
.. P3YM_HLI = col_character(),
.. P3YM_HLI_F = col_character(),
.. P3YM_HLI_M = col_character(),
.. P3HLINHE = col_character(),
.. P3HLINHE_F = col_character(),
.. P3HLINHE_M = col_character(),
.. P3HLI_HE = col_character(),
.. P3HLI_HE_F = col_character(),
.. P3HLI_HE_M = col_character(),
.. P5_HLI = col_character(),
.. P5_HLI_NHE = col_character(),
.. P5_HLI_HE = col_character(),
.. PHOG_IND = col_character(),
.. POB_AFRO = col_character(),
.. POB_AFRO_F = col_character(),
.. POB_AFRO_M = col_character(),
.. PCON_DISC = col_character(),
.. PCDISC_MOT = col_character(),
.. PCDISC_VIS = col_character(),
.. PCDISC_LENG = col_character(),
.. PCDISC_AUD = col_character(),
.. PCDISC_MOT2 = col_character(),
.. PCDISC_MEN = col_character(),
.. PCON_LIMI = col_character(),
.. PCLIM_CSB = col_character(),
.. PCLIM_VIS = col_character(),
.. PCLIM_HACO = col_character(),
.. PCLIM_OAUD = col_character(),
.. PCLIM_MOT2 = col_character(),
.. PCLIM_RE_CO = col_character(),
.. PCLIM_PMEN = col_character(),
.. PSIND_LIM = col_character(),
.. P3A5_NOA = col_character(),
.. P3A5_NOA_F = col_character(),
.. P3A5_NOA_M = col_character(),
.. P6A11_NOA = col_character(),
.. P6A11_NOAF = col_character(),
.. P6A11_NOAM = col_character(),
.. P12A14NOA = col_character(),
.. P12A14NOAF = col_character(),
.. P12A14NOAM = col_character(),
.. P15A17A = col_character(),
.. P15A17A_F = col_character(),
.. P15A17A_M = col_character(),
.. P18A24A = col_character(),
.. P18A24A_F = col_character(),
.. P18A24A_M = col_character(),
.. P8A14AN = col_character(),
.. P8A14AN_F = col_character(),
.. P8A14AN_M = col_character(),
.. P15YM_AN = col_character(),
.. P15YM_AN_F = col_character(),
.. P15YM_AN_M = col_character(),
.. P15YM_SE = col_character(),
.. P15YM_SE_F = col_character(),
.. P15YM_SE_M = col_character(),
.. P15PRI_IN = col_character(),
.. P15PRI_INF = col_character(),
.. P15PRI_INM = col_character(),
.. P15PRI_CO = col_character(),
.. P15PRI_COF = col_character(),
.. P15PRI_COM = col_character(),
.. P15SEC_IN = col_character(),
.. P15SEC_INF = col_character(),
.. P15SEC_INM = col_character(),
.. P15SEC_CO = col_character(),
.. P15SEC_COF = col_character(),
.. P15SEC_COM = col_character(),
.. P18YM_PB = col_character(),
.. P18YM_PB_F = col_character(),
.. P18YM_PB_M = col_character(),
.. GRAPROES = col_character(),
.. GRAPROES_F = col_character(),
.. GRAPROES_M = col_character(),
.. PEA = col_character(),
.. PEA_F = col_character(),
.. PEA_M = col_character(),
.. PE_INAC = col_character(),
.. PE_INAC_F = col_character(),
.. PE_INAC_M = col_character(),
.. POCUPADA = col_character(),
.. POCUPADA_F = col_character(),
.. POCUPADA_M = col_character(),
.. PDESOCUP = col_character(),
.. PDESOCUP_F = col_character(),
.. PDESOCUP_M = col_character(),
.. PSINDER = col_character(),
.. PDER_SS = col_character(),
.. PDER_IMSS = col_character(),
.. PDER_ISTE = col_character(),
.. PDER_ISTEE = col_character(),
.. PAFIL_PDOM = col_character(),
.. PDER_SEGP = col_character(),
.. PDER_IMSSB = col_character(),
.. PAFIL_IPRIV = col_character(),
.. PAFIL_OTRAI = col_character(),
.. P12YM_SOLT = col_character(),
.. P12YM_CASA = col_character(),
.. P12YM_SEPA = col_character(),
.. PCATOLICA = col_character(),
.. PRO_CRIEVA = col_character(),
.. POTRAS_REL = col_character(),
.. PSIN_RELIG = col_character(),
.. TOTHOG = col_character(),
.. HOGJEF_F = col_character(),
.. HOGJEF_M = col_character(),
.. POBHOG = col_character(),
.. PHOGJEF_F = col_character(),
.. PHOGJEF_M = col_character(),
.. VIVTOT = col_double(),
.. TVIVHAB = col_double(),
.. TVIVPAR = col_character(),
.. VIVPAR_HAB = col_character(),
.. VIVPARH_CV = col_character(),
.. TVIVPARHAB = col_character(),
.. VIVPAR_DES = col_character(),
.. VIVPAR_UT = col_character(),
.. OCUPVIVPAR = col_character(),
.. PROM_OCUP = col_character(),
.. PRO_OCUP_C = col_character(),
.. VPH_PISODT = col_character(),
.. VPH_PISOTI = col_character(),
.. VPH_1DOR = col_character(),
.. VPH_2YMASD = col_character(),
.. VPH_1CUART = col_character(),
.. VPH_2CUART = col_character(),
.. VPH_3YMASC = col_character(),
.. VPH_C_ELEC = col_character(),
.. VPH_S_ELEC = col_character(),
.. VPH_AGUADV = col_character(),
.. VPH_AEASP = col_character(),
.. VPH_AGUAFV = col_character(),
.. VPH_TINACO = col_character(),
.. VPH_CISTER = col_character(),
.. VPH_EXCSA = col_character(),
.. VPH_LETR = col_character(),
.. VPH_DRENAJ = col_character(),
.. VPH_NODREN = col_character(),
.. VPH_C_SERV = col_character(),
.. VPH_NDEAED = col_character(),
.. VPH_DSADMA = col_character(),
.. VPH_NDACMM = col_character(),
.. VPH_SNBIEN = col_character(),
.. VPH_REFRI = col_character(),
.. VPH_LAVAD = col_character(),
.. VPH_HMICRO = col_character(),
.. VPH_AUTOM = col_character(),
.. VPH_MOTO = col_character(),
.. VPH_BICI = col_character(),
.. VPH_RADIO = col_character(),
.. VPH_TV = col_character(),
.. VPH_PC = col_character(),
.. VPH_TELEF = col_character(),
.. VPH_CEL = col_character(),
.. VPH_INTER = col_character(),
.. VPH_STVP = col_character(),
.. VPH_SPMVPI = col_character(),
.. VPH_CVJ = col_character(),
.. VPH_SINRTV = col_character(),
.. VPH_SINLTC = col_character(),
.. VPH_SINCINT = col_character(),
.. VPH_SINTIC = col_character(),
.. TAMLOC = col_character()
.. )
- attr(*, "problems")=<externalptr>
clean_info_dict <- info_dict[-c(1:3), ]
names(clean_info_dict) <- clean_info_dict[1, ]
Warning: The `value` argument of `names<-` can't be empty as of tibble 3.0.0.Warning: The `value` argument of `names<-` must be a character vector as of tibble 3.0.0.
clean_info_dict <- clean_info_dict[-1,]
clean_info_dict <- clean_info_dict[, -c(7:10)]
clean_info_dict
unique_states <- df |>
distinct(NOM_ENT)
write_csv(unique_states, here("data", "processed", "entity_names.csv"))
unique_states
entities_csv <- read_csv(here("data", "processed", "entity_names.csv")) |> pull()
Rows: 33 Columns: 1── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (1): NOM_ENT
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
print(entities_csv)
[1] "Total nacional" "Aguascalientes" "Baja California" "Baja California Sur"
[5] "Campeche" "Coahuila de Zaragoza" "Colima" "Chiapas"
[9] "Chihuahua" "Ciudad de México" "Durango" "Guanajuato"
[13] "Guerrero" "Hidalgo" "Jalisco" "México"
[17] "Michoacán de Ocampo" "Morelos" "Nayarit" "Nuevo León"
[21] "Oaxaca" "Puebla" "Querétaro" "Quintana Roo"
[25] "San Luis Potosí" "Sinaloa" "Sonora" "Tabasco"
[29] "Tamaulipas" "Tlaxcala" "Veracruz de Ignacio de la Llave" "Yucatán"
[33] "Zacatecas"
Selecting rows that we’ll analyze
rows_to_include <- c(1:12, 53:132, 136:140, 147, 155:211, 220:232)
filtered_data <- clean_info_dict |>
filter(row_number() %in% rows_to_include) |>
pull(4)
filtered_data
[1] "ENTIDAD" "NOM_ENT" "MUN" "NOM_MUN" "LOC" "NOM_LOC" "LONGITUD" "LATITUD" "ALTITUD" "POBTOT" "POBFEM"
[12] "POBMAS" "REL_H_M" "POB0_14" "POB15_64" "POB65_MAS" "P_0A4" "P_0A4_F" "P_0A4_M" "P_5A9" "P_5A9_F" "P_5A9_M"
[23] "P_10A14" "P_10A14_F" "P_10A14_M" "P_15A19" "P_15A19_F" "P_15A19_M" "P_20A24" "P_20A24_F" "P_20A24_M" "P_25A29" "P_25A29_F"
[34] "P_25A29_M" "P_30A34" "P_30A34_F" "P_30A34_M" "P_35A39" "P_35A39_F" "P_35A39_M" "P_40A44" "P_40A44_F" "P_40A44_M" "P_45A49"
[45] "P_45A49_F" "P_45A49_M" "P_50A54" "P_50A54_F" "P_50A54_M" "P_55A59" "P_55A59_F" "P_55A59_M" "P_60A64" "P_60A64_F" "P_60A64_M"
[56] "P_65A69" "P_65A69_F" "P_65A69_M" "P_70A74" "P_70A74_F" "P_70A74_M" "P_75A79" "P_75A79_F" "P_75A79_M" "P_80A84" "P_80A84_F"
[67] "P_80A84_M" "P_85YMAS" "P_85YMAS_F" "P_85YMAS_M" "PROM_HNV" "PNACENT" "PNACENT_F" "PNACENT_M" "PNACOE" "PNACOE_F" "PNACOE_M"
[78] "PRES2015" "PRES2015_F" "PRES2015_M" "PRESOE15" "PRESOE15_F" "PRESOE15_M" "P3YM_HLI" "P3YM_HLI_F" "P3YM_HLI_M" "P3HLINHE" "P3HLINHE_F"
[89] "P3HLINHE_M" "P3HLI_HE" "P3HLI_HE_F" "P3HLI_HE_M" "PHOG_IND" "POB_AFRO" "POB_AFRO_F" "POB_AFRO_M" "PCON_DISC" "PCON_LIMI" "PSIND_LIM"
[100] "P3A5_NOA" "P3A5_NOA_F" "P3A5_NOA_M" "P6A11_NOA" "P6A11_NOAF" "P6A11_NOAM" "P12A14NOA" "P12A14NOAF" "P12A14NOAM" "P15A17A" "P15A17A_F"
[111] "P15A17A_M" "P18A24A" "P18A24A_F" "P18A24A_M" "P8A14AN" "P8A14AN_F" "P8A14AN_M" "P15YM_AN" "P15YM_AN_F" "P15YM_AN_M" "P15YM_SE"
[122] "P15YM_SE_F" "P15YM_SE_M" "P15PRI_IN" "P15PRI_INF" "P15PRI_INM" "P15PRI_CO" "P15PRI_COF" "P15PRI_COM" "P15SEC_IN" "P15SEC_INF" "P15SEC_INM"
[133] "P15SEC_CO" "P15SEC_COF" "P15SEC_COM" "P18YM_PB" "P18YM_PB_F" "P18YM_PB_M" "GRAPROES" "GRAPROES_F" "GRAPROES_M" "PEA" "PEA_F"
[144] "PEA_M" "PE_INAC" "PE_INAC_F" "PE_INAC_M" "POCUPADA" "POCUPADA_F" "POCUPADA_M" "PDESOCUP" "PDESOCUP_F" "PDESOCUP_M" "PSINDER"
[155] "PDER_SS" "P12YM_SOLT" "P12YM_CASA" "P12YM_SEPA" "PCATOLICA" "PRO_CRIEVA" "POTRAS_REL" "PSIN_RELIG" "TOTHOG" "HOGJEF_F" "HOGJEF_M"
[166] "POBHOG" "PHOGJEF_F" "PHOGJEF_M"
selected_df <- df |>
select(filtered_data)
Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
Please use `all_of()` or `any_of()` instead.
# Was:
data %>% select(filtered_data)
# Now:
data %>% select(all_of(filtered_data))
See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
selected_df
EDA before exporting
str(df)
spc_tbl_ [195,662 × 286] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
$ ENTIDAD : chr [1:195662] "00" "00" "00" "01" ...
$ NOM_ENT : chr [1:195662] "Total nacional" "Total nacional" "Total nacional" "Aguascalientes" ...
$ MUN : chr [1:195662] "000" "000" "000" "000" ...
$ NOM_MUN : chr [1:195662] "Total nacional" "Total nacional" "Total nacional" "Total de la entidad Aguascalientes" ...
$ LOC : chr [1:195662] "0000" "9998" "9999" "0000" ...
$ NOM_LOC : chr [1:195662] "Total nacional" "Localidades de una vivienda" "Localidades de dos viviendas" "Total de la Entidad" ...
$ LONGITUD : chr [1:195662] NA NA NA NA ...
$ LATITUD : chr [1:195662] NA NA NA NA ...
$ ALTITUD : chr [1:195662] NA NA NA NA ...
$ POBTOT : num [1:195662] 1.26e+08 2.50e+05 1.47e+05 1.43e+06 3.70e+03 ...
$ POBFEM : chr [1:195662] "64540634" "96869" "61324" "728924" ...
$ POBMAS : chr [1:195662] "61473390" "153485" "85801" "696683" ...
$ P_0A2 : chr [1:195662] "5764054" "10493" "6798" "71864" ...
$ P_0A2_F : chr [1:195662] "2848875" "5193" "3407" "35604" ...
$ P_0A2_M : chr [1:195662] "2915179" "5300" "3391" "36260" ...
$ P_3YMAS : chr [1:195662] "119976584" "239441" "139757" "1352235" ...
$ P_3YMAS_F : chr [1:195662] "61554567" "91463" "57628" "692561" ...
$ P_3YMAS_M : chr [1:195662] "58422017" "147978" "82129" "659674" ...
$ P_5YMAS : chr [1:195662] "115693273" "232086" "135028" "1299669" ...
$ P_5YMAS_F : chr [1:195662] "59433559" "87931" "55256" "666713" ...
$ P_5YMAS_M : chr [1:195662] "56259714" "144155" "79772" "632956" ...
$ P_12YMAS : chr [1:195662] "100528155" "207748" "119223" "1116719" ...
$ P_12YMAS_F : chr [1:195662] "51962264" "76111" "47543" "576593" ...
$ P_12YMAS_M : chr [1:195662] "48565891" "131637" "71680" "540126" ...
$ P_15YMAS : chr [1:195662] "93985354" "197411" "111530" "1038904" ...
$ P_15YMAS_F : chr [1:195662] "48732991" "71344" "44275" "538387" ...
$ P_15YMAS_M : chr [1:195662] "45252363" "126067" "67255" "500517" ...
$ P_18YMAS : chr [1:195662] "87492680" "186968" "104612" "960764" ...
$ P_18YMAS_F : chr [1:195662] "45530857" "66514" "41184" "500089" ...
$ P_18YMAS_M : chr [1:195662] "41961823" "120454" "63428" "460675" ...
$ P_3A5 : chr [1:195662] "6462212" "10900" "7028" "78833" ...
$ P_3A5_F : chr [1:195662] "3193548" "5270" "3511" "38679" ...
$ P_3A5_M : chr [1:195662] "3268664" "5630" "3517" "40154" ...
$ P_6A11 : chr [1:195662] "12986217" "20793" "13506" "156683" ...
$ P_6A11_F : chr [1:195662] "6398755" "10082" "6574" "77289" ...
$ P_6A11_M : chr [1:195662] "6587462" "10711" "6932" "79394" ...
$ P_8A14 : chr [1:195662] "15287375" "24342" "16724" "181905" ...
$ P_8A14_F : chr [1:195662] "7531118" "11538" "7679" "89383" ...
$ P_8A14_M : chr [1:195662] "7756257" "12804" "9045" "92522" ...
$ P_12A14 : chr [1:195662] "6542801" "10337" "7693" "77815" ...
$ P_12A14_F : chr [1:195662] "3229273" "4767" "3268" "38206" ...
$ P_12A14_M : chr [1:195662] "3313528" "5570" "4425" "39609" ...
$ P_15A17 : chr [1:195662] "6492674" "10443" "6918" "78140" ...
$ P_15A17_F : chr [1:195662] "3202134" "4830" "3091" "38298" ...
$ P_15A17_M : chr [1:195662] "3290540" "5613" "3827" "39842" ...
$ P_18A24 : chr [1:195662] "14736111" "27841" "16336" "180847" ...
$ P_18A24_F : chr [1:195662] "7398617" "11140" "6760" "90632" ...
$ P_18A24_M : chr [1:195662] "7337494" "16701" "9576" "90215" ...
$ P_15A49_F : chr [1:195662] "33885546" "47693" "29297" "388917" ...
$ P_60YMAS : chr [1:195662] "15142976" "37383" "21277" "145376" ...
$ P_60YMAS_F : chr [1:195662] "8139094" "13442" "8916" "78703" ...
$ P_60YMAS_M : chr [1:195662] "7003882" "23941" "12361" "66673" ...
$ REL_H_M : chr [1:195662] "95.25" "158.45" "139.91" "95.58" ...
$ POB0_14 : chr [1:195662] "31755284" "52523" "35025" "385195" ...
$ POB15_64 : chr [1:195662] "83663440" "171209" "96250" "941834" ...
$ POB65_MAS : chr [1:195662] "10321914" "26202" "15280" "97070" ...
$ P_0A4 : chr [1:195662] "10047365" "17848" "11527" "124430" ...
$ P_0A4_F : chr [1:195662] "4969883" "8725" "5779" "61452" ...
$ P_0A4_M : chr [1:195662] "5077482" "9123" "5748" "62978" ...
$ P_5A9 : chr [1:195662] "10764379" "17380" "11274" "131048" ...
$ P_5A9_F : chr [1:195662] "5311288" "8526" "5558" "64689" ...
$ P_5A9_M : chr [1:195662] "5453091" "8854" "5716" "66359" ...
$ P_10A14 : chr [1:195662] "10943540" "17295" "12224" "129717" ...
$ P_10A14_F : chr [1:195662] "5389280" "8061" "5423" "63637" ...
$ P_10A14_M : chr [1:195662] "5554260" "9234" "6801" "66080" ...
$ P_15A19 : chr [1:195662] "10806690" "18303" "11484" "131967" ...
$ P_15A19_F : chr [1:195662] "5344540" "8138" "5140" "65064" ...
$ P_15A19_M : chr [1:195662] "5462150" "10165" "6344" "66903" ...
$ P_20A24 : chr [1:195662] "10422095" "19981" "11770" "127020" ...
$ P_20A24_F : chr [1:195662] "5256211" "7832" "4711" "63866" ...
$ P_20A24_M : chr [1:195662] "5165884" "12149" "7059" "63154" ...
$ P_25A29 : chr [1:195662] "9993001" "20584" "12238" "118426" ...
$ P_25A29_F : chr [1:195662] "5131597" "7125" "4427" "60285" ...
$ P_25A29_M : chr [1:195662] "4861404" "13459" "7811" "58141" ...
$ P_30A34 : chr [1:195662] "9420827" "19601" "11315" "106825" ...
$ P_30A34_F : chr [1:195662] "4893101" "6309" "4074" "55174" ...
$ P_30A34_M : chr [1:195662] "4527726" "13292" "7241" "51651" ...
$ P_35A39 : chr [1:195662] "9020276" "18645" "10357" "99257" ...
$ P_35A39_F : chr [1:195662] "4688746" "6289" "3825" "51483" ...
$ P_35A39_M : chr [1:195662] "4331530" "12356" "6532" "47774" ...
$ P_40A44 : chr [1:195662] "8503586" "17934" "9705" "92378" ...
$ P_40A44_F : chr [1:195662] "4441282" "6060" "3743" "48539" ...
$ P_40A44_M : chr [1:195662] "4062304" "11874" "5962" "43839" ...
$ P_45A49 : chr [1:195662] "7942413" "16840" "8668" "84669" ...
$ P_45A49_F : chr [1:195662] "4130069" "5940" "3377" "44506" ...
$ P_45A49_M : chr [1:195662] "3812344" "10900" "5291" "40163" ...
$ P_50A54 : chr [1:195662] "7037532" "15070" "7878" "74121" ...
$ P_50A54_F : chr [1:195662] "3705369" "5481" "3239" "39510" ...
$ P_50A54_M : chr [1:195662] "3332163" "9589" "4639" "34611" ...
$ P_55A59 : chr [1:195662] "5695958" "13070" "6838" "58865" ...
$ P_55A59_F : chr [1:195662] "3002982" "4728" "2823" "31257" ...
$ P_55A59_M : chr [1:195662] "2692976" "8342" "4015" "27608" ...
$ P_60A64 : chr [1:195662] "4821062" "11181" "5997" "48306" ...
$ P_60A64_F : chr [1:195662] "2563200" "4050" "2511" "25871" ...
$ P_60A64_M : chr [1:195662] "2257862" "7131" "3486" "22435" ...
$ P_65A69 : chr [1:195662] "3645077" "9160" "5052" "35823" ...
$ P_65A69_F : chr [1:195662] "1938227" "3343" "2130" "19125" ...
$ P_65A69_M : chr [1:195662] "1706850" "5817" "2922" "16698" ...
$ P_70A74 : chr [1:195662] "2647340" "6903" "3852" "25586" ...
[list output truncated]
- attr(*, "spec")=
.. cols(
.. ENTIDAD = col_character(),
.. NOM_ENT = col_character(),
.. MUN = col_character(),
.. NOM_MUN = col_character(),
.. LOC = col_character(),
.. NOM_LOC = col_character(),
.. LONGITUD = col_character(),
.. LATITUD = col_character(),
.. ALTITUD = col_character(),
.. POBTOT = col_double(),
.. POBFEM = col_character(),
.. POBMAS = col_character(),
.. P_0A2 = col_character(),
.. P_0A2_F = col_character(),
.. P_0A2_M = col_character(),
.. P_3YMAS = col_character(),
.. P_3YMAS_F = col_character(),
.. P_3YMAS_M = col_character(),
.. P_5YMAS = col_character(),
.. P_5YMAS_F = col_character(),
.. P_5YMAS_M = col_character(),
.. P_12YMAS = col_character(),
.. P_12YMAS_F = col_character(),
.. P_12YMAS_M = col_character(),
.. P_15YMAS = col_character(),
.. P_15YMAS_F = col_character(),
.. P_15YMAS_M = col_character(),
.. P_18YMAS = col_character(),
.. P_18YMAS_F = col_character(),
.. P_18YMAS_M = col_character(),
.. P_3A5 = col_character(),
.. P_3A5_F = col_character(),
.. P_3A5_M = col_character(),
.. P_6A11 = col_character(),
.. P_6A11_F = col_character(),
.. P_6A11_M = col_character(),
.. P_8A14 = col_character(),
.. P_8A14_F = col_character(),
.. P_8A14_M = col_character(),
.. P_12A14 = col_character(),
.. P_12A14_F = col_character(),
.. P_12A14_M = col_character(),
.. P_15A17 = col_character(),
.. P_15A17_F = col_character(),
.. P_15A17_M = col_character(),
.. P_18A24 = col_character(),
.. P_18A24_F = col_character(),
.. P_18A24_M = col_character(),
.. P_15A49_F = col_character(),
.. P_60YMAS = col_character(),
.. P_60YMAS_F = col_character(),
.. P_60YMAS_M = col_character(),
.. REL_H_M = col_character(),
.. POB0_14 = col_character(),
.. POB15_64 = col_character(),
.. POB65_MAS = col_character(),
.. P_0A4 = col_character(),
.. P_0A4_F = col_character(),
.. P_0A4_M = col_character(),
.. P_5A9 = col_character(),
.. P_5A9_F = col_character(),
.. P_5A9_M = col_character(),
.. P_10A14 = col_character(),
.. P_10A14_F = col_character(),
.. P_10A14_M = col_character(),
.. P_15A19 = col_character(),
.. P_15A19_F = col_character(),
.. P_15A19_M = col_character(),
.. P_20A24 = col_character(),
.. P_20A24_F = col_character(),
.. P_20A24_M = col_character(),
.. P_25A29 = col_character(),
.. P_25A29_F = col_character(),
.. P_25A29_M = col_character(),
.. P_30A34 = col_character(),
.. P_30A34_F = col_character(),
.. P_30A34_M = col_character(),
.. P_35A39 = col_character(),
.. P_35A39_F = col_character(),
.. P_35A39_M = col_character(),
.. P_40A44 = col_character(),
.. P_40A44_F = col_character(),
.. P_40A44_M = col_character(),
.. P_45A49 = col_character(),
.. P_45A49_F = col_character(),
.. P_45A49_M = col_character(),
.. P_50A54 = col_character(),
.. P_50A54_F = col_character(),
.. P_50A54_M = col_character(),
.. P_55A59 = col_character(),
.. P_55A59_F = col_character(),
.. P_55A59_M = col_character(),
.. P_60A64 = col_character(),
.. P_60A64_F = col_character(),
.. P_60A64_M = col_character(),
.. P_65A69 = col_character(),
.. P_65A69_F = col_character(),
.. P_65A69_M = col_character(),
.. P_70A74 = col_character(),
.. P_70A74_F = col_character(),
.. P_70A74_M = col_character(),
.. P_75A79 = col_character(),
.. P_75A79_F = col_character(),
.. P_75A79_M = col_character(),
.. P_80A84 = col_character(),
.. P_80A84_F = col_character(),
.. P_80A84_M = col_character(),
.. P_85YMAS = col_character(),
.. P_85YMAS_F = col_character(),
.. P_85YMAS_M = col_character(),
.. PROM_HNV = col_character(),
.. PNACENT = col_character(),
.. PNACENT_F = col_character(),
.. PNACENT_M = col_character(),
.. PNACOE = col_character(),
.. PNACOE_F = col_character(),
.. PNACOE_M = col_character(),
.. PRES2015 = col_character(),
.. PRES2015_F = col_character(),
.. PRES2015_M = col_character(),
.. PRESOE15 = col_character(),
.. PRESOE15_F = col_character(),
.. PRESOE15_M = col_character(),
.. P3YM_HLI = col_character(),
.. P3YM_HLI_F = col_character(),
.. P3YM_HLI_M = col_character(),
.. P3HLINHE = col_character(),
.. P3HLINHE_F = col_character(),
.. P3HLINHE_M = col_character(),
.. P3HLI_HE = col_character(),
.. P3HLI_HE_F = col_character(),
.. P3HLI_HE_M = col_character(),
.. P5_HLI = col_character(),
.. P5_HLI_NHE = col_character(),
.. P5_HLI_HE = col_character(),
.. PHOG_IND = col_character(),
.. POB_AFRO = col_character(),
.. POB_AFRO_F = col_character(),
.. POB_AFRO_M = col_character(),
.. PCON_DISC = col_character(),
.. PCDISC_MOT = col_character(),
.. PCDISC_VIS = col_character(),
.. PCDISC_LENG = col_character(),
.. PCDISC_AUD = col_character(),
.. PCDISC_MOT2 = col_character(),
.. PCDISC_MEN = col_character(),
.. PCON_LIMI = col_character(),
.. PCLIM_CSB = col_character(),
.. PCLIM_VIS = col_character(),
.. PCLIM_HACO = col_character(),
.. PCLIM_OAUD = col_character(),
.. PCLIM_MOT2 = col_character(),
.. PCLIM_RE_CO = col_character(),
.. PCLIM_PMEN = col_character(),
.. PSIND_LIM = col_character(),
.. P3A5_NOA = col_character(),
.. P3A5_NOA_F = col_character(),
.. P3A5_NOA_M = col_character(),
.. P6A11_NOA = col_character(),
.. P6A11_NOAF = col_character(),
.. P6A11_NOAM = col_character(),
.. P12A14NOA = col_character(),
.. P12A14NOAF = col_character(),
.. P12A14NOAM = col_character(),
.. P15A17A = col_character(),
.. P15A17A_F = col_character(),
.. P15A17A_M = col_character(),
.. P18A24A = col_character(),
.. P18A24A_F = col_character(),
.. P18A24A_M = col_character(),
.. P8A14AN = col_character(),
.. P8A14AN_F = col_character(),
.. P8A14AN_M = col_character(),
.. P15YM_AN = col_character(),
.. P15YM_AN_F = col_character(),
.. P15YM_AN_M = col_character(),
.. P15YM_SE = col_character(),
.. P15YM_SE_F = col_character(),
.. P15YM_SE_M = col_character(),
.. P15PRI_IN = col_character(),
.. P15PRI_INF = col_character(),
.. P15PRI_INM = col_character(),
.. P15PRI_CO = col_character(),
.. P15PRI_COF = col_character(),
.. P15PRI_COM = col_character(),
.. P15SEC_IN = col_character(),
.. P15SEC_INF = col_character(),
.. P15SEC_INM = col_character(),
.. P15SEC_CO = col_character(),
.. P15SEC_COF = col_character(),
.. P15SEC_COM = col_character(),
.. P18YM_PB = col_character(),
.. P18YM_PB_F = col_character(),
.. P18YM_PB_M = col_character(),
.. GRAPROES = col_character(),
.. GRAPROES_F = col_character(),
.. GRAPROES_M = col_character(),
.. PEA = col_character(),
.. PEA_F = col_character(),
.. PEA_M = col_character(),
.. PE_INAC = col_character(),
.. PE_INAC_F = col_character(),
.. PE_INAC_M = col_character(),
.. POCUPADA = col_character(),
.. POCUPADA_F = col_character(),
.. POCUPADA_M = col_character(),
.. PDESOCUP = col_character(),
.. PDESOCUP_F = col_character(),
.. PDESOCUP_M = col_character(),
.. PSINDER = col_character(),
.. PDER_SS = col_character(),
.. PDER_IMSS = col_character(),
.. PDER_ISTE = col_character(),
.. PDER_ISTEE = col_character(),
.. PAFIL_PDOM = col_character(),
.. PDER_SEGP = col_character(),
.. PDER_IMSSB = col_character(),
.. PAFIL_IPRIV = col_character(),
.. PAFIL_OTRAI = col_character(),
.. P12YM_SOLT = col_character(),
.. P12YM_CASA = col_character(),
.. P12YM_SEPA = col_character(),
.. PCATOLICA = col_character(),
.. PRO_CRIEVA = col_character(),
.. POTRAS_REL = col_character(),
.. PSIN_RELIG = col_character(),
.. TOTHOG = col_character(),
.. HOGJEF_F = col_character(),
.. HOGJEF_M = col_character(),
.. POBHOG = col_character(),
.. PHOGJEF_F = col_character(),
.. PHOGJEF_M = col_character(),
.. VIVTOT = col_double(),
.. TVIVHAB = col_double(),
.. TVIVPAR = col_character(),
.. VIVPAR_HAB = col_character(),
.. VIVPARH_CV = col_character(),
.. TVIVPARHAB = col_character(),
.. VIVPAR_DES = col_character(),
.. VIVPAR_UT = col_character(),
.. OCUPVIVPAR = col_character(),
.. PROM_OCUP = col_character(),
.. PRO_OCUP_C = col_character(),
.. VPH_PISODT = col_character(),
.. VPH_PISOTI = col_character(),
.. VPH_1DOR = col_character(),
.. VPH_2YMASD = col_character(),
.. VPH_1CUART = col_character(),
.. VPH_2CUART = col_character(),
.. VPH_3YMASC = col_character(),
.. VPH_C_ELEC = col_character(),
.. VPH_S_ELEC = col_character(),
.. VPH_AGUADV = col_character(),
.. VPH_AEASP = col_character(),
.. VPH_AGUAFV = col_character(),
.. VPH_TINACO = col_character(),
.. VPH_CISTER = col_character(),
.. VPH_EXCSA = col_character(),
.. VPH_LETR = col_character(),
.. VPH_DRENAJ = col_character(),
.. VPH_NODREN = col_character(),
.. VPH_C_SERV = col_character(),
.. VPH_NDEAED = col_character(),
.. VPH_DSADMA = col_character(),
.. VPH_NDACMM = col_character(),
.. VPH_SNBIEN = col_character(),
.. VPH_REFRI = col_character(),
.. VPH_LAVAD = col_character(),
.. VPH_HMICRO = col_character(),
.. VPH_AUTOM = col_character(),
.. VPH_MOTO = col_character(),
.. VPH_BICI = col_character(),
.. VPH_RADIO = col_character(),
.. VPH_TV = col_character(),
.. VPH_PC = col_character(),
.. VPH_TELEF = col_character(),
.. VPH_CEL = col_character(),
.. VPH_INTER = col_character(),
.. VPH_STVP = col_character(),
.. VPH_SPMVPI = col_character(),
.. VPH_CVJ = col_character(),
.. VPH_SINRTV = col_character(),
.. VPH_SINLTC = col_character(),
.. VPH_SINCINT = col_character(),
.. VPH_SINTIC = col_character(),
.. TAMLOC = col_character()
.. )
- attr(*, "problems")=<externalptr>
Exporting as parquet
# Export wrangled data as parquet file
table <- arrow::Table$create(selected_df)
output_dir <- here("data", "processed", "parquet_data")
arrow::write_dataset(table, output_dir, partitioning = c("NOM_ENT", "ENTIDAD"), existing_data_behavior = "overwrite")
Reading parquet
ds <- open_dataset(here("data", "processed", "parquet_data")) |>
collect()
ds
Puebla
ds_puebla <- open_dataset(here("data", "processed", "parquet_data")) |>
filter(NOM_ENT=="Puebla") |>
collect()
ds_puebla
Yucatán
ds_yucatan <- open_dataset(here("data", "processed", "parquet_data")) |>
filter(NOM_ENT=="Yucatán") |>
collect()
ds_yucatan
Nuevo León
ds_nuevo_leon <- open_dataset(here("data", "processed", "parquet_data")) |>
filter(NOM_ENT=="Nuevo León") |>
collect()
ds_nuevo_leon
Total Nacional
ds_nacional <- open_dataset(here("data", "processed", "parquet_data")) |>
filter(NOM_ENT=="Total nacional") |>
collect()
ds_nacional
Verify datasets are not empty
for(value in entities_csv) {
read_dfs <- open_dataset(here("data", "processed", "parquet_data")) |>
filter(NOM_ENT==value) |>
collect()
if (nrow(read_dfs) == 0) {
print(paste("Dataset is empty", value))
} else {
print(paste("OK", value, nrow(read_dfs)))
}
}
[1] "OK Total nacional 3"
[1] "OK Aguascalientes 2058"
[1] "OK Baja California 5566"
[1] "OK Baja California Sur 2561"
[1] "OK Campeche 2800"
[1] "OK Coahuila de Zaragoza 4149"
[1] "OK Colima 1259"
[1] "OK Chiapas 21487"
[1] "OK Chihuahua 12389"
[1] "OK Ciudad de México 666"
[1] "OK Durango 6006"
[1] "OK Guanajuato 8945"
[1] "OK Guerrero 7001"
[1] "OK Hidalgo 4916"
[1] "OK Jalisco 10715"
[1] "OK México 5136"
[1] "OK Michoacán de Ocampo 8956"
[1] "OK Morelos 1678"
[1] "OK Nayarit 2913"
[1] "OK Nuevo León 4974"
[1] "OK Oaxaca 11856"
[1] "OK Puebla 7059"
[1] "OK Querétaro 2249"
[1] "OK Quintana Roo 2243"
[1] "OK San Luis Potosí 6729"
[1] "OK Sinaloa 5552"
[1] "OK Sonora 7500"
[1] "OK Tabasco 2517"
[1] "OK Tamaulipas 6695"
[1] "OK Tlaxcala 1323"
[1] "OK Veracruz de Ignacio de la Llave 20401"
[1] "OK Yucatán 2691"
[1] "OK Zacatecas 4669"
Coordinate Lab
longitudes <- selected_df$LONGITUD
latitudes <- selected_df$LATITUD
test_lat <- longitudes[8]
test_lat
[1] "102°17'45.768\" W"
sections <- unlist(strsplit(test_lat, "[°'\" ]"))
degrees <- as.numeric(sections[1])
minutes <- as.numeric(sections[2])
seconds <- as.numeric(sections[3])
decimal_degrees <- (degrees + minutes / 60 + seconds / 3600) * -1
decimal_degrees
[1] -102.296
longitude_to_decimal <- function(test_long) {
if (is.na(test_long)) {
return(NA)
}
sections <- unlist(strsplit(test_long, "[°'\" ]"))
degrees <- as.numeric(sections[1])
minutes <- as.numeric(sections[2])
seconds <- as.numeric(sections[3])
decimal_degrees <- (degrees + minutes / 60 + seconds / 3600) * -1
}
latitude_to_decimal <- function(test_lat) {
if (is.na(test_lat)) {
return(NA)
}
sections <- unlist(strsplit(test_lat, "[°'\" ]"))
degrees <- as.numeric(sections[1])
minutes <- as.numeric(sections[2])
seconds <- as.numeric(sections[3])
decimal_degrees <- (degrees + minutes / 60 + seconds / 3600)
}
Exporting clean
table <- arrow::Table$create(selected_clean)
output_dir <- here("data", "processed", "parquet_data_coords")
arrow::write_dataset(table, output_dir, partitioning = c("NOM_ENT", "ENTIDAD"), existing_data_behavior = "overwrite")
Verify unique cities per State
for(value in entities_csv) {
read_dfs <- open_dataset(here("data", "processed", "parquet_data")) |>
filter(NOM_ENT==value) |>
collect()
if (nrow(read_dfs) == length(unique(read_dfs$NOM_LOC))) {
print(paste("Localities Unique", value))
} else {
print(paste("NOT OK", value, nrow(read_dfs), "<>", length(unique(read_dfs$NOM_LOC))))
}
}
[1] "Localities Unique Total nacional"
[1] "NOT OK Aguascalientes 2058 <> 1772"
[1] "NOT OK Baja California 5566 <> 4621"
[1] "NOT OK Baja California Sur 2561 <> 1832"
[1] "NOT OK Campeche 2800 <> 1894"
[1] "NOT OK Coahuila de Zaragoza 4149 <> 3287"
[1] "NOT OK Colima 1259 <> 1035"
[1] "NOT OK Chiapas 21487 <> 10349"
[1] "NOT OK Chihuahua 12389 <> 8082"
[1] "NOT OK Ciudad de México 666 <> 617"
[1] "NOT OK Durango 6006 <> 4444"
[1] "NOT OK Guanajuato 8945 <> 6923"
[1] "NOT OK Guerrero 7001 <> 5189"
[1] "NOT OK Hidalgo 4916 <> 3690"
[1] "NOT OK Jalisco 10715 <> 6764"
[1] "NOT OK México 5136 <> 4291"
[1] "NOT OK Michoacán de Ocampo 8956 <> 6065"
[1] "NOT OK Morelos 1678 <> 1471"
[1] "NOT OK Nayarit 2913 <> 2243"
[1] "NOT OK Nuevo León 4974 <> 3328"
[1] "NOT OK Oaxaca 11856 <> 7924"
[1] "NOT OK Puebla 7059 <> 5037"
[1] "NOT OK Querétaro 2249 <> 1885"
[1] "NOT OK Quintana Roo 2243 <> 1832"
[1] "NOT OK San Luis Potosí 6729 <> 5037"
[1] "NOT OK Sinaloa 5552 <> 4064"
[1] "NOT OK Sonora 7500 <> 5710"
[1] "NOT OK Tabasco 2517 <> 2019"
[1] "NOT OK Tamaulipas 6695 <> 4601"
[1] "NOT OK Tlaxcala 1323 <> 1075"
[1] "NOT OK Veracruz de Ignacio de la Llave 20401 <> 12141"
[1] "NOT OK Yucatán 2691 <> 1790"
[1] "NOT OK Zacatecas 4669 <> 3594"
for(value in entities_csv) {
read_dfs <- open_dataset(here("data", "processed", "parquet_data")) |>
filter(NOM_ENT==value) |>
collect()
read_dfs$NOM_MUN_LOC <- paste(read_dfs$NOM_MUN, read_dfs$NOM_LOC, sep = "_")
if (nrow(read_dfs) == length(unique(read_dfs$NOM_MUN_LOC))) {
print(paste("Localities Unique", value))
} else {
print(paste("NOT OK", value, nrow(read_dfs), "<>", length(unique(read_dfs$NOM_MUN_LOC))))
}
}
[1] "Localities Unique Total nacional"
[1] "NOT OK Aguascalientes 2058 <> 1979"
[1] "NOT OK Baja California 5566 <> 5074"
[1] "NOT OK Baja California Sur 2561 <> 2212"
[1] "NOT OK Campeche 2800 <> 2368"
[1] "NOT OK Coahuila de Zaragoza 4149 <> 4023"
[1] "NOT OK Colima 1259 <> 1177"
[1] "NOT OK Chiapas 21487 <> 18268"
[1] "NOT OK Chihuahua 12389 <> 11167"
[1] "NOT OK Ciudad de México 666 <> 663"
[1] "NOT OK Durango 6006 <> 5578"
[1] "NOT OK Guanajuato 8945 <> 8753"
[1] "NOT OK Guerrero 7001 <> 6870"
[1] "NOT OK Hidalgo 4916 <> 4870"
[1] "NOT OK Jalisco 10715 <> 10393"
[1] "NOT OK México 5136 <> 5108"
[1] "NOT OK Michoacán de Ocampo 8956 <> 8656"
[1] "NOT OK Morelos 1678 <> 1662"
[1] "NOT OK Nayarit 2913 <> 2726"
[1] "NOT OK Nuevo León 4974 <> 4641"
[1] "NOT OK Oaxaca 11856 <> 11760"
[1] "NOT OK Puebla 7059 <> 6837"
[1] "NOT OK Querétaro 2249 <> 2222"
[1] "NOT OK Quintana Roo 2243 <> 2138"
[1] "NOT OK San Luis Potosí 6729 <> 6590"
[1] "NOT OK Sinaloa 5552 <> 5141"
[1] "NOT OK Sonora 7500 <> 7141"
[1] "NOT OK Tabasco 2517 <> 2399"
[1] "NOT OK Tamaulipas 6695 <> 6265"
[1] "NOT OK Tlaxcala 1323 <> 1315"
[1] "NOT OK Veracruz de Ignacio de la Llave 20401 <> 19225"
[1] "NOT OK Yucatán 2691 <> 2558"
[1] "NOT OK Zacatecas 4669 <> 4610"
for(value in entities_csv) {
read_dfs <- open_dataset(here("data", "processed", "parquet_data")) |>
filter(NOM_ENT==value) |>
collect()
read_dfs$NOM_LOC_LOC <- paste(read_dfs$LOC, read_dfs$NOM_LOC, sep = "_")
if (nrow(read_dfs) == length(unique(read_dfs$NOM_LOC_LOC))) {
print(paste("Localities Unique", value))
} else {
print(paste("NOT OK", value, nrow(read_dfs), "<>", length(unique(read_dfs$NOM_LOC_LOC))))
}
}
[1] "Localities Unique Total nacional"
[1] "NOT OK Aguascalientes 2058 <> 2026"
[1] "NOT OK Baja California 5566 <> 5548"
[1] "NOT OK Baja California Sur 2561 <> 2547"
[1] "NOT OK Campeche 2800 <> 2765"
[1] "NOT OK Coahuila de Zaragoza 4149 <> 4031"
[1] "NOT OK Colima 1259 <> 1230"
[1] "NOT OK Chiapas 21487 <> 20987"
[1] "NOT OK Chihuahua 12389 <> 12161"
[1] "NOT OK Ciudad de México 666 <> 638"
[1] "NOT OK Durango 6006 <> 5885"
[1] "NOT OK Guanajuato 8945 <> 8792"
[1] "NOT OK Guerrero 7001 <> 6740"
[1] "NOT OK Hidalgo 4916 <> 4655"
[1] "NOT OK Jalisco 10715 <> 10277"
[1] "NOT OK México 5136 <> 4878"
[1] "NOT OK Michoacán de Ocampo 8956 <> 8602"
[1] "NOT OK Morelos 1678 <> 1581"
[1] "NOT OK Nayarit 2913 <> 2852"
[1] "NOT OK Nuevo León 4974 <> 4819"
[1] "NOT OK Oaxaca 11856 <> 10515"
[1] "NOT OK Puebla 7059 <> 6511"
[1] "NOT OK Querétaro 2249 <> 2191"
[1] "NOT OK Quintana Roo 2243 <> 2211"
[1] "NOT OK San Luis Potosí 6729 <> 6519"
[1] "NOT OK Sinaloa 5552 <> 5499"
[1] "NOT OK Sonora 7500 <> 7293"
[1] "NOT OK Tabasco 2517 <> 2474"
[1] "NOT OK Tamaulipas 6695 <> 6551"
[1] "NOT OK Tlaxcala 1323 <> 1179"
[1] "NOT OK Veracruz de Ignacio de la Llave 20401 <> 19640"
[1] "NOT OK Yucatán 2691 <> 2430"
[1] "NOT OK Zacatecas 4669 <> 4488"
for(value in entities_csv) {
read_dfs <- open_dataset(here("data", "processed", "parquet_data")) |>
filter(NOM_ENT==value) |>
collect()
read_dfs$LOC_MUN <- paste(read_dfs$LOC, read_dfs$MUN, sep = "_")
if (nrow(read_dfs) == length(unique(read_dfs$LOC_MUN))) {
print(paste("Localities Unique", value))
} else {
print(paste("NOT OK", value, nrow(read_dfs), "<>", length(unique(read_dfs$LOC_MUN))))
}
}
[1] "Localities Unique Total nacional"
[1] "Localities Unique Aguascalientes"
[1] "Localities Unique Baja California"
[1] "Localities Unique Baja California Sur"
[1] "Localities Unique Campeche"
[1] "Localities Unique Coahuila de Zaragoza"
[1] "Localities Unique Colima"
[1] "Localities Unique Chiapas"
[1] "Localities Unique Chihuahua"
[1] "Localities Unique Ciudad de México"
[1] "Localities Unique Durango"
[1] "Localities Unique Guanajuato"
[1] "Localities Unique Guerrero"
[1] "Localities Unique Hidalgo"
[1] "Localities Unique Jalisco"
[1] "Localities Unique México"
[1] "Localities Unique Michoacán de Ocampo"
[1] "Localities Unique Morelos"
[1] "Localities Unique Nayarit"
[1] "Localities Unique Nuevo León"
[1] "Localities Unique Oaxaca"
[1] "Localities Unique Puebla"
[1] "Localities Unique Querétaro"
[1] "Localities Unique Quintana Roo"
[1] "Localities Unique San Luis Potosí"
[1] "Localities Unique Sinaloa"
[1] "Localities Unique Sonora"
[1] "Localities Unique Tabasco"
[1] "Localities Unique Tamaulipas"
[1] "Localities Unique Tlaxcala"
[1] "Localities Unique Veracruz de Ignacio de la Llave"
[1] "Localities Unique Yucatán"
[1] "Localities Unique Zacatecas"
Conclusion
Append code to MUN and LOC
LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCmF1dGhvcjogTWFyY28gUG9sbyBCcmF2byBNb250aWVsDQpkYXRlOiAyMDIwLTA0LTIxDQotLS0NCg0KIyMjIExpYnJhcmllcw0KDQpgYGB7cn0NCiMgaWYgKCFyZXF1aXJlKCJyZW52IikpIGluc3RhbGwucGFja2FnZXMoInJlbnYiKQ0KIyBsaWJyYXJ5KHJlbnYpDQojIHJlbnY6OnJlc3RvcmUoKQ0KbGlicmFyeShoZXJlKQ0KbGlicmFyeShkcGx5cikNCmxpYnJhcnkocmVhZHIpDQpsaWJyYXJ5KGFycm93KQ0KYGBgDQoNCiMjIyBSZWFkIGRhdGENCg0KYGBge3J9DQp6aXBfZmlsZSA8LSBoZXJlKCJkYXRhIiwgInJhdyIsICJpdGVyXzAwX2NwdjIwMjBfY3N2LnppcCIpDQpgYGANCg0KYGBge3J9DQp0ZW1wX2RpciA8LSBoZXJlKCJ0ZW1wIikNCmRpci5jcmVhdGUodGVtcF9kaXIsIHNob3dXYXJuaW5ncyA9IEZBTFNFKQ0KDQp1bnppcCh6aXBfZmlsZSwgZmlsZXMgPSBjKCJpdGVyXzAwX2NwdjIwMjAvY29uanVudG9fZGVfZGF0b3MvY29uanVudG9fZGVfZGF0b3NfaXRlcl8wMENTVjIwLmNzdiIsICJpdGVyXzAwX2NwdjIwMjAvZGljY2lvbmFyaW9fZGF0b3MvZGljY2lvbmFyaW9fZGF0b3NfaXRlcl8wMENTVjIwLmNzdiIpLCBleGRpciA9IHRlbXBfZGlyKQ0KYGBgDQoNCmBgYHtyfQ0KDQpkYXRhX3BhdGggPC0gaGVyZSh0ZW1wX2RpciwNCiAgICAgICAgICAgICAgICAgIml0ZXJfMDBfY3B2MjAyMCIsDQogICAgICAgICAgICAgICAgICJjb25qdW50b19kZV9kYXRvcyIsDQogICAgICAgICAgICAgICAgICJjb25qdW50b19kZV9kYXRvc19pdGVyXzAwQ1NWMjAuY3N2IikNCg0KZGljdF9wYXRoIDwtIGhlcmUodGVtcF9kaXIsDQogICAgICAgICAgICAgICAgICJpdGVyXzAwX2NwdjIwMjAiLA0KICAgICAgICAgICAgICAgICAiZGljY2lvbmFyaW9fZGF0b3MiLA0KICAgICAgICAgICAgICAgICAiZGljY2lvbmFyaW9fZGF0b3NfaXRlcl8wMENTVjIwLmNzdiIpDQoNCmluZm9fZGljdCA8LSByZWFkX2NzdihkaWN0X3BhdGgpDQpkZiA8LSByZWFkX2NzdihkYXRhX3BhdGgpDQoNCg0KdW5saW5rKHRlbXBfZGlyLCByZWN1cnNpdmUgPSBUUlVFKQ0KYGBgDQoNCmBgYHtyfQ0KIyBFeHBvcnRpbmcgZGljdGlvbmFyeSBmaWxlDQp3cml0ZV9jc3YoaW5mb19kaWN0LA0KICAgICAgICAgIGhlcmUoImRhdGEiLCAicmF3IiwgImRpY2Npb25hcmlvX2RhdG9zX2l0ZXJfMDBDU1YyMC5jc3YiKSkNCg0KYGBgDQoNCiMjIyBFeHBsb3JhdGlvbg0KDQpgYGB7cn0NCmhlYWQoZGYpDQpoZWFkKGluZm9fZGljdCkNCmBgYA0KDQpgYGB7cn0NCnN0cihkZikNCmBgYA0KDQpgYGB7cn0NCmNsZWFuX2luZm9fZGljdCA8LSBpbmZvX2RpY3RbLWMoMTozKSwgXQ0KbmFtZXMoY2xlYW5faW5mb19kaWN0KSA8LSBjbGVhbl9pbmZvX2RpY3RbMSwgXQ0KY2xlYW5faW5mb19kaWN0IDwtIGNsZWFuX2luZm9fZGljdFstMSxdDQpjbGVhbl9pbmZvX2RpY3QgPC0gY2xlYW5faW5mb19kaWN0WywgLWMoNzoxMCldDQoNCg0KY2xlYW5faW5mb19kaWN0DQpgYGANCg0KYGBge3J9DQp1bmlxdWVfc3RhdGVzIDwtIGRmIHw+IA0KICAgICAgZGlzdGluY3QoTk9NX0VOVCkNCg0Kd3JpdGVfY3N2KHVuaXF1ZV9zdGF0ZXMsIGhlcmUoImRhdGEiLCAicHJvY2Vzc2VkIiwgImVudGl0eV9uYW1lcy5jc3YiKSkNCg0KDQp1bmlxdWVfc3RhdGVzDQpgYGANCg0KYGBge3J9DQplbnRpdGllc19jc3YgPC0gcmVhZF9jc3YoaGVyZSgiZGF0YSIsICJwcm9jZXNzZWQiLCAiZW50aXR5X25hbWVzLmNzdiIpKSB8PiBwdWxsKCkNCg0KcHJpbnQoZW50aXRpZXNfY3N2KQ0KYGBgDQoNCiMjIyBTZWxlY3Rpbmcgcm93cyB0aGF0IHdlJ2xsIGFuYWx5emUNCg0KYGBge3J9DQpyb3dzX3RvX2luY2x1ZGUgPC0gYygxOjEyLCA1MzoxMzIsIDEzNjoxNDAsIDE0NywgMTU1OjIxMSwgMjIwOjIzMikNCg0KZmlsdGVyZWRfZGF0YSA8LSBjbGVhbl9pbmZvX2RpY3QgfD4gDQogICAgICBmaWx0ZXIocm93X251bWJlcigpICVpbiUgcm93c190b19pbmNsdWRlKSB8PiANCiAgICAgIHB1bGwoNCkNCg0KZmlsdGVyZWRfZGF0YQ0KYGBgDQoNCmBgYHtyfQ0Kc2VsZWN0ZWRfZGYgPC0gZGYgfD4gDQogICAgICBzZWxlY3QoZmlsdGVyZWRfZGF0YSkNCg0Kc2VsZWN0ZWRfZGYNCmBgYA0KDQojIyMgRURBIGJlZm9yZSBleHBvcnRpbmcNCg0KYGBge3J9DQpzdHIoZGYpDQpgYGANCg0KIyMjIEV4cG9ydGluZyBhcyBwYXJxdWV0DQoNCmBgYHtyfQ0KIyBFeHBvcnQgd3JhbmdsZWQgZGF0YSBhcyBwYXJxdWV0IGZpbGUNCnRhYmxlIDwtIGFycm93OjpUYWJsZSRjcmVhdGUoc2VsZWN0ZWRfZGYpDQoNCm91dHB1dF9kaXIgPC0gaGVyZSgiZGF0YSIsICJwcm9jZXNzZWQiLCAicGFycXVldF9kYXRhIikNCg0KYXJyb3c6OndyaXRlX2RhdGFzZXQodGFibGUsIG91dHB1dF9kaXIsIHBhcnRpdGlvbmluZyA9IGMoIk5PTV9FTlQiLCAiRU5USURBRCIpLCBleGlzdGluZ19kYXRhX2JlaGF2aW9yID0gIm92ZXJ3cml0ZSIpDQpgYGANCg0KIyMjIFJlYWRpbmcgcGFycXVldA0KDQpgYGB7cn0NCmRzIDwtIG9wZW5fZGF0YXNldChoZXJlKCJkYXRhIiwgInByb2Nlc3NlZCIsICJwYXJxdWV0X2RhdGEiKSkgfD4gDQogICAgICAgIGNvbGxlY3QoKQ0KDQpkcw0KYGBgDQoNCiMjIyBQdWVibGENCg0KYGBge3J9DQpkc19wdWVibGEgPC0gb3Blbl9kYXRhc2V0KGhlcmUoImRhdGEiLCAicHJvY2Vzc2VkIiwgInBhcnF1ZXRfZGF0YSIpKSB8Pg0KICAgIGZpbHRlcihOT01fRU5UPT0iUHVlYmxhIikgfD4gDQogICAgY29sbGVjdCgpDQoNCmRzX3B1ZWJsYQ0KYGBgDQoNCiMjIyBZdWNhdMOhbg0KDQpgYGB7cn0NCmRzX3l1Y2F0YW4gPC0gb3Blbl9kYXRhc2V0KGhlcmUoImRhdGEiLCAicHJvY2Vzc2VkIiwgInBhcnF1ZXRfZGF0YSIpKSB8Pg0KICAgIGZpbHRlcihOT01fRU5UPT0iWXVjYXTDoW4iKSB8PiANCiAgICBjb2xsZWN0KCkNCg0KZHNfeXVjYXRhbg0KYGBgDQoNCiMjIyBOdWV2byBMZcOzbg0KDQpgYGB7cn0NCmRzX251ZXZvX2xlb24gPC0gb3Blbl9kYXRhc2V0KGhlcmUoImRhdGEiLCAicHJvY2Vzc2VkIiwgInBhcnF1ZXRfZGF0YSIpKSB8Pg0KICAgIGZpbHRlcihOT01fRU5UPT0iTnVldm8gTGXDs24iKSB8PiANCiAgICBjb2xsZWN0KCkNCg0KZHNfbnVldm9fbGVvbg0KYGBgDQoNCiMjIyBUb3RhbCBOYWNpb25hbA0KDQpgYGB7cn0NCmRzX25hY2lvbmFsIDwtIG9wZW5fZGF0YXNldChoZXJlKCJkYXRhIiwgInByb2Nlc3NlZCIsICJwYXJxdWV0X2RhdGEiKSkgfD4NCiAgICBmaWx0ZXIoTk9NX0VOVD09IlRvdGFsIG5hY2lvbmFsIikgfD4gDQogICAgY29sbGVjdCgpDQoNCmRzX25hY2lvbmFsDQpgYGANCg0KIyMjIFZlcmlmeSBkYXRhc2V0cyBhcmUgbm90IGVtcHR5DQoNCmBgYHtyfQ0KDQpmb3IodmFsdWUgaW4gZW50aXRpZXNfY3N2KSB7DQogIA0KICByZWFkX2RmcyA8LSBvcGVuX2RhdGFzZXQoaGVyZSgiZGF0YSIsICJwcm9jZXNzZWQiLCAicGFycXVldF9kYXRhIikpIHw+DQogICAgZmlsdGVyKE5PTV9FTlQ9PXZhbHVlKSB8Pg0KICAgIGNvbGxlY3QoKQ0KDQogIGlmIChucm93KHJlYWRfZGZzKSA9PSAwKSB7DQogICAgcHJpbnQocGFzdGUoIkRhdGFzZXQgaXMgZW1wdHkiLCB2YWx1ZSkpDQogIH0gZWxzZSB7DQogICAgICAgIHByaW50KHBhc3RlKCJPSyIsIHZhbHVlLCBucm93KHJlYWRfZGZzKSkpDQoNCiAgfQ0KDQp9DQpgYGANCg0KIyMjIENvb3JkaW5hdGUgTGFiDQoNCmBgYHtyfQ0KbG9uZ2l0dWRlcyA8LSBzZWxlY3RlZF9kZiRMT05HSVRVRA0KbGF0aXR1ZGVzIDwtIHNlbGVjdGVkX2RmJExBVElUVUQNCmBgYA0KDQpgYGB7cn0NCnRlc3RfbG9uZyA8LSBsb25naXR1ZGVzWzhdDQp0ZXN0X2xvbmcNCmBgYA0KDQpgYGB7cn0NCnNlY3Rpb25zIDwtIHVubGlzdChzdHJzcGxpdCh0ZXN0X2xvbmcsICJbwrAnXCIgXSIpKQ0KZGVncmVlcyA8LSBhcy5udW1lcmljKHNlY3Rpb25zWzFdKQ0KbWludXRlcyA8LSBhcy5udW1lcmljKHNlY3Rpb25zWzJdKQ0Kc2Vjb25kcyA8LSBhcy5udW1lcmljKHNlY3Rpb25zWzNdKQ0KDQpkZWNpbWFsX2RlZ3JlZXMgPC0gKGRlZ3JlZXMgKyBtaW51dGVzIC8gNjAgKyBzZWNvbmRzIC8gMzYwMCkgKiAtMQ0KZGVjaW1hbF9kZWdyZWVzDQpgYGANCg0KYGBge3J9DQpsb25naXR1ZGVfdG9fZGVjaW1hbCA8LSBmdW5jdGlvbih0ZXN0X2xvbmcpIHsNCiAgICBpZiAoaXMubmEodGVzdF9sb25nKSkgew0KICAgIHJldHVybihOQSkgIA0KICAgIH0NCiAgDQogIHNlY3Rpb25zIDwtIHVubGlzdChzdHJzcGxpdCh0ZXN0X2xvbmcsICJbwrAnXCIgXSIpKQ0KICANCiAgZGVncmVlcyA8LSBhcy5udW1lcmljKHNlY3Rpb25zWzFdKQ0KICBtaW51dGVzIDwtIGFzLm51bWVyaWMoc2VjdGlvbnNbMl0pDQogIHNlY29uZHMgPC0gYXMubnVtZXJpYyhzZWN0aW9uc1szXSkNCg0KICANCiAgZGVjaW1hbF9kZWdyZWVzIDwtIChkZWdyZWVzICsgbWludXRlcyAvIDYwICsgc2Vjb25kcyAvIDM2MDApICogLTENCn0NCg0KbGF0aXR1ZGVfdG9fZGVjaW1hbCA8LSBmdW5jdGlvbih0ZXN0X2xhdCkgew0KICAgIGlmIChpcy5uYSh0ZXN0X2xhdCkpIHsNCiAgICByZXR1cm4oTkEpICANCiAgICB9DQogIA0KICBzZWN0aW9ucyA8LSB1bmxpc3Qoc3Ryc3BsaXQodGVzdF9sYXQsICJbwrAnXCIgXSIpKQ0KICANCiAgZGVncmVlcyA8LSBhcy5udW1lcmljKHNlY3Rpb25zWzFdKQ0KICBtaW51dGVzIDwtIGFzLm51bWVyaWMoc2VjdGlvbnNbMl0pDQogIHNlY29uZHMgPC0gYXMubnVtZXJpYyhzZWN0aW9uc1szXSkNCg0KICANCiAgZGVjaW1hbF9kZWdyZWVzIDwtIChkZWdyZWVzICsgbWludXRlcyAvIDYwICsgc2Vjb25kcyAvIDM2MDApDQp9DQpgYGANCg0KYGBge3J9DQpzZWxlY3RlZF9jbGVhbiA8LSBzZWxlY3RlZF9kZiB8PiANCiAgICAgIG11dGF0ZShsb25naXR1ZGVfZGVjaW1hbCA9IHNhcHBseShMT05HSVRVRCwgbG9uZ2l0dWRlX3RvX2RlY2ltYWwpLA0KICAgICAgICAgICAgIGxhdGl0dWRlX2RlY2ltYWwgPSBzYXBwbHkoTEFUSVRVRCwgbGF0aXR1ZGVfdG9fZGVjaW1hbCkpDQoNCnNlbGVjdGVkX2NsZWFuDQpgYGANCg0KIyMjIEV4cG9ydGluZyBjbGVhbg0KDQpgYGB7cn0NCnRhYmxlIDwtIGFycm93OjpUYWJsZSRjcmVhdGUoc2VsZWN0ZWRfY2xlYW4pDQoNCm91dHB1dF9kaXIgPC0gaGVyZSgiZGF0YSIsICJwcm9jZXNzZWQiLCAicGFycXVldF9kYXRhX2Nvb3JkcyIpDQoNCmFycm93Ojp3cml0ZV9kYXRhc2V0KHRhYmxlLCBvdXRwdXRfZGlyLCBwYXJ0aXRpb25pbmcgPSBjKCJOT01fRU5UIiwgIkVOVElEQUQiKSwgZXhpc3RpbmdfZGF0YV9iZWhhdmlvciA9ICJvdmVyd3JpdGUiKQ0KYGBgDQoNCiMjIyBWZXJpZnkgdW5pcXVlIGNpdGllcyBwZXIgU3RhdGUNCg0KYGBge3J9DQpmb3IodmFsdWUgaW4gZW50aXRpZXNfY3N2KSB7DQogIA0KICByZWFkX2RmcyA8LSBvcGVuX2RhdGFzZXQoaGVyZSgiZGF0YSIsICJwcm9jZXNzZWQiLCAicGFycXVldF9kYXRhIikpIHw+DQogICAgZmlsdGVyKE5PTV9FTlQ9PXZhbHVlKSB8Pg0KICAgIGNvbGxlY3QoKQ0KDQogIGlmIChucm93KHJlYWRfZGZzKSA9PSBsZW5ndGgodW5pcXVlKHJlYWRfZGZzJE5PTV9MT0MpKSkgew0KICAgIHByaW50KHBhc3RlKCJMb2NhbGl0aWVzIFVuaXF1ZSIsIHZhbHVlKSkNCiAgfSBlbHNlIHsNCiAgICAgICAgcHJpbnQocGFzdGUoIk5PVCBPSyIsIHZhbHVlLCBucm93KHJlYWRfZGZzKSwgIjw+IiwgbGVuZ3RoKHVuaXF1ZShyZWFkX2RmcyROT01fTE9DKSkpKQ0KDQogIH0NCg0KfQ0KYGBgDQoNCmBgYHtyfQ0KZm9yKHZhbHVlIGluIGVudGl0aWVzX2Nzdikgew0KICANCiAgcmVhZF9kZnMgPC0gb3Blbl9kYXRhc2V0KGhlcmUoImRhdGEiLCAicHJvY2Vzc2VkIiwgInBhcnF1ZXRfZGF0YSIpKSB8Pg0KICAgIGZpbHRlcihOT01fRU5UPT12YWx1ZSkgfD4NCiAgICBjb2xsZWN0KCkNCiAgDQogICAgcmVhZF9kZnMkTk9NX01VTl9MT0MgPC0gcGFzdGUocmVhZF9kZnMkTk9NX01VTiwgcmVhZF9kZnMkTk9NX0xPQywgc2VwID0gIl8iKQ0KDQogIGlmIChucm93KHJlYWRfZGZzKSA9PSBsZW5ndGgodW5pcXVlKHJlYWRfZGZzJE5PTV9NVU5fTE9DKSkpIHsNCiAgICBwcmludChwYXN0ZSgiTG9jYWxpdGllcyBVbmlxdWUiLCB2YWx1ZSkpDQogIH0gZWxzZSB7DQogICAgICAgIHByaW50KHBhc3RlKCJOT1QgT0siLCB2YWx1ZSwgbnJvdyhyZWFkX2RmcyksICI8PiIsIGxlbmd0aCh1bmlxdWUocmVhZF9kZnMkTk9NX01VTl9MT0MpKSkpDQoNCiAgfQ0KDQp9DQpgYGANCg0KYGBge3J9DQpmb3IodmFsdWUgaW4gZW50aXRpZXNfY3N2KSB7DQogIA0KICByZWFkX2RmcyA8LSBvcGVuX2RhdGFzZXQoaGVyZSgiZGF0YSIsICJwcm9jZXNzZWQiLCAicGFycXVldF9kYXRhIikpIHw+DQogICAgZmlsdGVyKE5PTV9FTlQ9PXZhbHVlKSB8Pg0KICAgIGNvbGxlY3QoKQ0KICANCiAgICByZWFkX2RmcyROT01fTE9DX0xPQyA8LSBwYXN0ZShyZWFkX2RmcyRMT0MsIHJlYWRfZGZzJE5PTV9MT0MsIHNlcCA9ICJfIikNCg0KICBpZiAobnJvdyhyZWFkX2RmcykgPT0gbGVuZ3RoKHVuaXF1ZShyZWFkX2RmcyROT01fTE9DX0xPQykpKSB7DQogICAgcHJpbnQocGFzdGUoIkxvY2FsaXRpZXMgVW5pcXVlIiwgdmFsdWUpKQ0KICB9IGVsc2Ugew0KICAgICAgICBwcmludChwYXN0ZSgiTk9UIE9LIiwgdmFsdWUsIG5yb3cocmVhZF9kZnMpLCAiPD4iLCBsZW5ndGgodW5pcXVlKHJlYWRfZGZzJE5PTV9MT0NfTE9DKSkpKQ0KDQogIH0NCg0KfQ0KYGBgDQoNCmBgYHtyfQ0KZm9yKHZhbHVlIGluIGVudGl0aWVzX2Nzdikgew0KICANCiAgcmVhZF9kZnMgPC0gb3Blbl9kYXRhc2V0KGhlcmUoImRhdGEiLCAicHJvY2Vzc2VkIiwgInBhcnF1ZXRfZGF0YSIpKSB8Pg0KICAgIGZpbHRlcihOT01fRU5UPT12YWx1ZSkgfD4NCiAgICBjb2xsZWN0KCkNCiAgDQogICAgcmVhZF9kZnMkTE9DX01VTiA8LSBwYXN0ZShyZWFkX2RmcyRMT0MsIHJlYWRfZGZzJE1VTiwgc2VwID0gIl8iKQ0KDQogIGlmIChucm93KHJlYWRfZGZzKSA9PSBsZW5ndGgodW5pcXVlKHJlYWRfZGZzJExPQ19NVU4pKSkgew0KICAgIHByaW50KHBhc3RlKCJMb2NhbGl0aWVzIFVuaXF1ZSIsIHZhbHVlKSkNCiAgfSBlbHNlIHsNCiAgICAgICAgcHJpbnQocGFzdGUoIk5PVCBPSyIsIHZhbHVlLCBucm93KHJlYWRfZGZzKSwgIjw+IiwgbGVuZ3RoKHVuaXF1ZShyZWFkX2RmcyRMT0NfTVVOKSkpKQ0KDQogIH0NCg0KfQ0KYGBgDQoNCiMjIyBDb25jbHVzaW9uDQoNCiMjIyMgQXBwZW5kIGNvZGUgdG8gTVVOIGFuZCBMT0MNCg==